from IPython.display import Latex, Math
In this post, I will try to explain the DCGAN paper
Before we do that, you may want to look at the idea of GANs and how they work. Here's one such blog. In short, Generator network tries to generate images that are similar to actual images in order to fool the Discriminator. This is the objective of Generator. Input to the Generator is a random vector $Z$ of size, say 100, from which Generator tries to produce a fake image. The problem GANs generally face is called Mode Collapse, where Generator always produces only one fake image from any input random vector $Z$ provided.
DCGAN tried addressing these issues and suggested the following things :
Architecture guidelines for stable Deep Convolutional GANs
• Replace any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator).
• Use batchnorm in both the generator and the discriminator.
• Remove fully connected hidden layers for deeper architectures.
• Use ReLU activation in generator for all layers except for the output, which uses Tanh. • Use LeakyReLU activation in the discriminator for all layers.
from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from pathlib import Path
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)


datapath = Path('/media/mano/Data/comicgen/ComicGen_CycleGAN/celeba-dataset/img_align_celeba/img_align_celeba/')
batch_size = 128
weight_sd = 0.02
disc_leakyrelu_rate = 0.2
'''As specified in the DCGAN paper, both are Adam optimizers with learning rate 0.0002 and Beta1 = 0.5. '''
adam_lr = 0.0002
beta1 = 0.5
'''
As the authors mentioned : zero-centered Normal distribution with standard deviation 0.02. we set our mean = 0 and sd = 0.02
'''
def params_init(m,standard_deviation=0.01):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
$y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta$



class Generator(nn.Module):
def __init__(self):
super(Generator,self).__init__()
'''
Input is 100 dim vector.
Dense layer (100 x 4*4*1024) to get 4 * 4 * 1024 vector
Reshape it to (4,4,1024)
ConvTranspose to (8,8,512)
ConvTranspose to (16,16,256)
ConvTranspose to (32,32,128)
ConvTranspose to (64,64,3)
Conv2D formula for output size is
output = ( n + 2p - f )/s + 1 , where
n = height, width of image
p = padding
f = feature size / kernel size
s = stride
Conv2D Transpose formula for output size is
output = ( n - 1 )*s + f - 2p
'''
self.inpdense = nn.Linear(100,4*4*1024)
self.conv1 = nn.ConvTranspose2d(1024,512,kernel_size=4,stride=2,padding=1) # 4,4 to 8,8
self.conv2 = nn.ConvTranspose2d(512,256,kernel_size=4,stride=2,padding=1) # 8,8 to 16,16
self.conv3 = nn.ConvTranspose2d(256,128,kernel_size=4,stride=2,padding=1) # 16,16 to 32,32
self.conv4 = nn.ConvTranspose2d(128,3,kernel_size=4,stride=2,padding=1) # 32,32 to 64,64
self.conv1_bn = nn.BatchNorm2d(512)
self.conv2_bn = nn.BatchNorm2d(256)
self.conv3_bn = nn.BatchNorm2d(128)
self.relu = nn.ReLU()
self.tanh = nn.Tanh()
def forward(self,inp):
inp = self.inpdense(inp)
bz = inp.shape[0]
inp = inp.view(bz,1024,4,4)
inp = self.conv1(inp)
inp = self.conv1_bn(inp)
inp = self.relu(inp)
inp = self.conv2(inp)
inp = self.conv2_bn(inp)
inp = self.relu(inp)
inp = self.conv3(inp)
inp = self.conv3_bn(inp)
inp = self.relu(inp)
inp = self.conv4(inp)
inp = self.tanh(inp)
return inp
generator = Generator()
generator


class Discriminator(nn.Module):
def __init__(self):
super(Discriminator,self).__init__()
'''
As mention, LeakyReLU is to be used.
Input is (64,64,3)
Conv to (32,32,128)
Conv to (16,16,256)
Conv to (8,8,512)
Conv to (4,4,1024)
Sigmoid (For binary classification)
'''
self.conv1 = nn.Conv2d(3,128,kernel_size=4,stride=2,padding=1) # (64,64,3) to (32,32,128)
self.conv2 = nn.Conv2d(128,256,kernel_size=4,stride=2,padding=1) # (32,32,128) to (16,16,256)
self.conv3 = nn.Conv2d(256,512,kernel_size=4,stride=2,padding=1) # (16,16,256) to (8,8,512)
self.conv4 = nn.Conv2d(512,1024,kernel_size=4,stride=2,padding=1) # (8,8,512) to (4,4,1024)
self.conv5 = nn.Conv2d(1024,1,kernel_size=4,stride=1,padding=0) # (4,4,1024) to (1,1,1)
self.lrelu = nn.LeakyReLU(0.2)
self.sigmoid = nn.Sigmoid()
self.conv2_bn = nn.BatchNorm2d(256)
self.conv3_bn = nn.BatchNorm2d(512)
self.conv4_bn = nn.BatchNorm2d(1024)
def forward(self,inp):
inp = self.conv1(inp)
inp = self.lrelu(inp)
inp = self.conv2(inp)
inp = self.conv2_bn(inp)
inp = self.lrelu(inp)
inp = self.conv3(inp)
inp = self.conv3_bn(inp)
inp = self.lrelu(inp)
inp = self.conv4(inp)
inp = self.conv4_bn(inp)
inp = self.lrelu(inp)
inp = self.conv5(inp)
inp = self.sigmoid(inp)
return inp
discriminator = Discriminator()
discriminator
$BCEloss = -(y*\log{p} + (1-y)*\log{(1-p)}) $
For a binary classification problem, y = {0,1}. If we say, P is the probability of an instance belonging to 1st class.Then,
For 0th class, $loss = - ( 0*\log{p} + (1-0)*\log{(1-p)} )$ ==> $-log(1-p)$
For 1st class, $loss = - ( 1*\log{p} + (1-1)*\log{(1-p)} )$ ==> $-log(p)$
Probability of finding object to real be $P(D($x$))$.
Our loss function for Discriminator, given that $x$ is a real object and $G(z)$ is a fake object, should maximize $P(D($x$))$ and reduce P(D(G(z))), equal to $P(1-D(G(z)))$
Loss function for Generator is maximize $P(D(G(z)))$ and reduce $P(D(x))$
# Initialize BCELoss function
criterion = nn.BCELoss()
# Setup Adam optimizers for both G and D
optimizerD = optim.Adam(discriminator.parameters(), lr=adam_lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(generator.parameters(), lr=adam_lr, betas=(beta1, 0.999))
# Size of z latent vector (i.e. size of generator input)
nz = 100
import multiprocessing
dataroot = '/media/mano/Data/comicgen/ComicGen_CycleGAN/celeba-dataset/img_align_celeba/'
image_size = 64
# workers = 2
workers = multiprocessing.cpu_count()
ngpu = 1
# We can use an image folder dataset the way we have it setup.
# Create the dataset
dataset = dset.ImageFolder(root=dataroot,
transform=transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]))
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
shuffle=True, num_workers=workers)
# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))
# Create batch of latent vectors that we will use to visualize
# the progression of the generator
fixed_noise = torch.randn(64, nz, device=device)
# Establish convention for real and fake labels during training
real_label = 1
fake_label = 0
generator = generator.to(device)
discriminator = discriminator.to(device)
num_epochs = 3
# Training Loop
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
discriminator.zero_grad()
# Format batch
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, device=device)
# Forward pass real batch through D
output = discriminator(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, device=device)
# Generate fake image batch with G
fake = generator(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = discriminator(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch
errD_fake.backward()
D_G_z1 = output.mean().item()
# Add the gradients from the all-real and all-fake batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
generator.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = discriminator(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = generator(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
for i in range(len(img_list)):
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(img_list[i].cpu(),(1,2,0)))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(img_list[4].cpu(),(1,2,0)))
